library(tidyverse)
library(tidymodels)
library(tidytext)
library(plotly)
library(ggpubr)
library(GGally)
library(ggdist)
library(embed)
library(here)tracks_summary_file <- here("data", "processed", "tracks_summary_2.tsv")
tracks_summary_df <- read_tsv(tracks_summary_file, show_col_types = FALSE) %>%
mutate(
across(contains("id"), as.character),
divided = as.logical(divided),
across(contains("filamented_"), ~factor(
.x,
levels = c(FALSE, TRUE),
labels = c("Not filamented", "Filamented")
)
),
across(contains("survived_"), ~factor(
.x,
levels = c(FALSE, TRUE),
labels = c("Not survived", "Survived")
)
),
cell_status = paste0(filamented_id, " - ", survived_strict) %>%
factor()
) %>%
relocate(where(is.character), where(is.factor), where(is.logical)) %>%
glimpse()## Rows: 12,566
## Columns: 26
## $ experiment_id <chr> "Chromosome", "Chromosome"…
## $ id <chr> "xy01_1_10.008-34.000", "x…
## $ filamented_id <fct> Filamented, Filamented, Fi…
## $ survived_strict <fct> Survived, Survived, Surviv…
## $ survived_soft <fct> Survived, Survived, Surviv…
## $ cell_status <fct> Filamented - Survived, Fil…
## $ divided <lgl> TRUE, TRUE, TRUE, TRUE, TR…
## $ filamentation_threshold <dbl> 38.94111, 38.94111, 38.941…
## $ lived_time <dbl> 110, 100, 110, 110, 110, 1…
## $ centered_antibiotic_start_time <dbl> 60, 60, 60, 60, 60, 60, 60…
## $ centered_antibiotic_end_time <dbl> 100, 100, 100, 100, 100, 1…
## $ centered_experiment_end_time <dbl> 150, 150, 150, 150, 150, 1…
## $ sos_at_time <dbl> 20, 10, 30, 70, 70, 70, 20…
## $ dead_or_missing_at_time <dbl> 120, 120, 120, 120, 120, 1…
## $ initial_length <dbl> 25.84965, 41.41517, 29.758…
## $ initial_gfp <dbl> 0.9977303, 0.9980406, 0.99…
## $ initial_ds_red <dbl> 2.987506, 2.987671, 2.9844…
## $ end_length <dbl> 36.53545, 85.05345, 89.147…
## $ end_gfp <dbl> 0.9965211, 0.9976678, 0.99…
## $ end_ds_red <dbl> 2.983707, 2.983269, 2.9825…
## $ sos_length <dbl> 41.41517, 41.41517, 40.578…
## $ sos_gfp <dbl> 0.9980406, 0.9980406, 0.99…
## $ sos_ds_red <dbl> 2.987671, 2.987671, 2.9839…
## $ n_divisons <dbl> 2, 1, 4, 2, 2, 2, 2, 1, 1,…
## $ time_last_division <dbl> 60, 10, 70, 80, 80, 80, 50…
## $ time_since_last_divison_to_experiment_start <dbl> 50, 50, 10, 50, 50, 50, 10…
theme_set(
theme_bw() +
theme(
legend.position = "top",
strip.background = element_blank()
)
)parse_metrics_column <- function(.data, metric_column) {
.data %>%
mutate(
{{ metric_column }} := str_remove(
string = {{ metric_column }},
pattern = "_(.+)"
) %>%
factor(
levels = c("initial", "sos", "end"),
labels = c("Initial", "SOS", "End")
)
)
}tracks_summary_df %>%
count(experiment_id, cell_status) %>%
group_by(experiment_id) %>%
mutate(
percentage = n / sum(n) * 100,
ymax = cumsum(percentage),
ymin = c(0, head(ymax, -1)),
labels = paste0(format(percentage, digits = 2), "%"),
labels_position = (ymax + ymin) / 2,
total_label = paste0("Total:\n", format(sum(n), big.mark = ","), " cells")
) %>%
ungroup() %>%
identity() %>%
ggplot(
aes(
ymin = ymin,
ymax = ymax,
xmin = 3,
xmax = 4
)
) +
geom_rect(
size = 1.5,
color = "white",
aes(fill = cell_status)
) +
geom_label(
x = 2,
aes(
y = labels_position,
label = labels
),
label.size = NA,
size = 3.5
) +
geom_text(aes(x = -Inf, y = -Inf, label = total_label), hjust = 0.5, vjust = 0.5) +
facet_grid(. ~ experiment_id) +
coord_polar(theta = "y") +
xlim(c(-1, 4)) +
guides(
fill = guide_legend(ncol = 2)
) +
theme_void() +
theme(
legend.position = "bottom"
) +
labs(
fill = "Cell status"
) +
NULL# tracks_summary_df %>%
# select(-sos_gfp) %>%
# pivot_longer(
# cols = contains("gfp"),
# names_to = "metric",
# values_to = "value"
# ) %>%
# parse_metrics_column(metric) %>%
# filter(!is.na(value)) %>%
# ggplot(aes(x = value, color = cell_status, fill = cell_status)) +
# geom_density(aes(y = ..scaled..), alpha = 1 / 4) +
# facet_grid(metric ~ experiment_id) +
# scale_y_continuous(
# labels = scales::percent
# ) +
# theme(
# panel.spacing.y = unit(0.5, "lines")
# ) +
# guides(
# color = guide_legend(ncol = 2),
# fill = guide_legend(ncol = 2)
# ) +
# labs(
# x = "Normalized mean GFP (log10)",
# y = "Scaled density",
# color = "Cell status",
# fill = "Cell status"
# ) +
# NULLtracks_summary_df %>%
select(-sos_gfp) %>%
pivot_longer(
cols = contains("gfp"),
names_to = "metric",
values_to = "value"
) %>%
parse_metrics_column(metric) %>%
filter(!is.na(value)) %>%
identity() %>%
ggplot(aes(x = cell_status, y = value, fill = cell_status)) +
stat_halfeye() +
stat_summary(fun=median, geom="line", aes(group=1)) +
stat_compare_means(method = "anova", label.y.npc = 0.9) + # Add global annova p-value
stat_compare_means(label = "p.signif", method = "t.test", ref.group = "Filamented - Survived", hide.ns = TRUE, label.y.npc = 0.8) +
stat_gradientinterval(position = "dodge", fill_type = "segments") +
facet_grid(experiment_id ~ metric) +
guides(
color = guide_legend(ncol = 2),
fill = guide_legend(ncol = 2)
) +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
labs(
fill = "Cell status",
y = "GFP value"
) +
NULL# tracks_summary_df %>%
# select(-sos_length) %>%
# pivot_longer(
# cols = contains("length"),
# names_to = "metric",
# values_to = "value"
# ) %>%
# parse_metrics_column(metric) %>%
# filter(!is.na(value)) %>%
# ggplot(aes(x = value, color = cell_status, fill = cell_status)) +
# geom_density(aes(y = ..scaled..), alpha = 1 / 4) +
# facet_grid(metric ~ experiment_id) +
# scale_y_continuous(
# labels = scales::percent
# ) +
# coord_cartesian(xlim = c(0, 150)) +
# theme(
# panel.spacing.y = unit(0.5, "lines")
# ) +
# guides(
# color = guide_legend(ncol = 2),
# fill = guide_legend(ncol = 2)
# ) +
# labs(
# x = "Cell length",
# y = "Scaled density",
# color = "Cell status",
# fill = "Cell status"
# ) +
# NULLtracks_summary_df %>%
select(-sos_length) %>%
pivot_longer(
cols = contains("length"),
names_to = "metric",
values_to = "value"
) %>%
parse_metrics_column(metric) %>%
filter(!is.na(value)) %>%
identity() %>%
ggplot(aes(x = cell_status, y = value, fill = cell_status)) +
stat_halfeye() +
stat_summary(fun=median, geom="line", aes(group=1)) +
stat_compare_means(method = "anova", label.y.npc = 0.4, label.x.npc = 0.5) + # Add global annova p-value
stat_compare_means(label = "p.signif", method = "t.test", ref.group = "Filamented - Survived", hide.ns = TRUE, label.y.npc = 0.3) +
stat_gradientinterval(position = "dodge", fill_type = "segments") +
facet_grid(experiment_id ~ metric) +
coord_cartesian(ylim = c(0, 150)) +
guides(
color = guide_legend(ncol = 2),
fill = guide_legend(ncol = 2)
) +
theme(
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
) +
labs(
fill = "Cell status",
y = "Length value"
) +
NULLtracks_summary_df %>%
ggplot(aes(x = initial_gfp, y = initial_length, color = cell_status)) +
geom_point(alpha = 1/20) +
facet_grid(~experiment_id) +
guides(
color = guide_legend(ncol = 2, override.aes = list(alpha = 1)),
fill = guide_legend(ncol = 2)
) +
labs(
x = "Initial normalized GFP (log10)",
y = "Initial length",
color = "Cell status"
) +
NULLtracks_summary_df %>%
ggplot(aes(x = end_gfp - initial_gfp, y = end_length - initial_length, color = cell_status)) +
geom_point(alpha = 1/20) +
facet_grid(~experiment_id) +
guides(
color = guide_legend(ncol = 2, override.aes = list(alpha = 1)),
fill = guide_legend(ncol = 2)
) +
labs(
x = "End normalized GFP - Initial normalized GFP (log10)",
y = "End length - Initial length",
color = "Cell status"
) +
NULLtracks_summary_df %>%
ggplot(aes(x =factor(lived_time), fill = cell_status)) +
geom_bar(position = "fill", stat = "count", width = 1) +
facet_grid(experiment_id ~ .) +
scale_x_discrete(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0), labels = scales::percent) +
guides(
color = guide_legend(ncol = 2),
fill = guide_legend(ncol = 2)
) +
theme(
panel.spacing.y = unit(0.9, "lines"),
panel.grid = element_blank()
) +
labs(
x = "Cell life time",
y = "Percentage of cells",
fill = "Cell status"
) +
NULLtracks_summary_df %>%
select(experiment_id, id, lived_time, filamented_id, survived_strict, contains("gfp")) %>%
rename(survived = contains("survived")) %>%
pivot_longer(
cols = contains("gfp"),
names_to = "metric"
) %>%
filter(!is.na(value)) %>%
group_by(experiment_id, lived_time, filamented_id, survived, metric) %>%
summarize(
value = mean(value),
.groups = "drop"
) %>%
parse_metrics_column(metric) %>%
identity() %>%
ggplot(aes(x = lived_time, y = value)) +
geom_line(aes(group = lived_time)) +
geom_point(aes(color = metric, shape = survived)) +
facet_grid(filamented_id ~ experiment_id) +
scale_color_hue(direction = -1, h.start = 90) +
scale_fill_hue(direction = -1, h.start = 90) +
labs(
x = "Cell life time",
y = "Average normalized GFP value",
color = "Length type",
shape = "Cell status"
) +
theme(
panel.spacing.x = unit(0.9, "lines"),
) +
NULLtracks_summary_df %>%
select(experiment_id, id, lived_time, filamented_id, survived_strict, filamentation_threshold, contains("length")) %>%
rename(survived = contains("survived")) %>%
pivot_longer(
cols = contains("length"),
names_to = "metric"
) %>%
filter(!is.na(value)) %>%
group_by(experiment_id, lived_time, filamented_id, survived, metric) %>%
summarize(
filamentation_threshold = first(filamentation_threshold),
value = mean(value),
.groups = "drop"
) %>%
mutate(
lived_time = factor(lived_time),
) %>%
parse_metrics_column(metric) %>%
identity() %>%
ggplot(aes(x = lived_time, y = value)) +
geom_hline(aes(yintercept = filamentation_threshold), linetype = "dashed", alpha = 1/2) +
geom_line(aes(group = lived_time)) +
geom_point(aes(color = metric, shape = survived)) +
facet_grid(experiment_id ~ filamented_id) +
scale_color_hue(direction = -1, h.start = 90) +
scale_fill_hue(direction = -1, h.start = 90) +
labs(
x = "Cell life time",
y = "Average length value",
color = "Length type",
shape = "Cell status"
) +
theme(
panel.spacing.x = unit(0.9, "lines"),
) +
NULLtracks_summary_df %>%
group_by(experiment_id, cell_status) %>%
summarise(
n = median(n_divisons),
.groups = "drop"
) %>%
ggplot(aes(x = fct_reorder(cell_status, n), y = n, color = cell_status, fill = cell_status)) +
geom_point() +
geom_segment(aes(xend = cell_status, y=0, yend = n)) +
facet_grid(. ~ experiment_id) +
theme(
panel.grid.major = element_blank(),
legend.position = "none"
) +
labs(
x = "Cell status",
y = "Median number of division"
) +
coord_flip() +
NULLtracks_summary_df %>%
filter(!is.na(time_since_last_divison_to_experiment_start)) %>%
ggplot(aes(x = factor(time_since_last_divison_to_experiment_start), fill = cell_status)) +
geom_bar(position = "dodge", stat = "count", color = "white") +
facet_grid(experiment_id ~ ., scales = "free_y") +
#scale_x_discrete(expand = c(0, 0)) +
#scale_y_continuous(expand = c(0, 0)) +
guides(
color = guide_legend(ncol = 2),
fill = guide_legend(ncol = 2)
) +
theme(
panel.spacing.y = unit(0.9, "lines"),
panel.grid.major = element_blank()
) +
labs(
x = "Time since last division to experiment start",
y = "Count of cells",
fill = "Cell status"
) +
NULLexperiment_datasets <- tracks_summary_df %>%
select(experiment_id, cell_status, contains("length"), contains("gfp"), -contains("sos"), -contains("end")) %>%
group_by(experiment_id) %>%
{
grouped_data <- .
group_split(grouped_data) %>%
set_names(nm = group_keys(grouped_data) %>% pull())
} %>%
map(select, -experiment_id) %>%
identity()
chromosome_df <- experiment_datasets$Chromosome
plasmid_df <- experiment_datasets$Plasmidc_pca_rec <- recipe(cell_status ~ ., data = chromosome_df) %>%
step_naomit(all_predictors()) %>%
step_normalize(all_predictors()) %>%
step_pca(all_predictors())
c_pca_prep <- prep(c_pca_rec)
c_pca_prep## Data Recipe
##
## Inputs:
##
## role #variables
## outcome 1
## predictor 2
##
## Training data contained 893 data points and no missing data.
##
## Operations:
##
## Removing rows with NA values in all_predictors()
## Centering and scaling for initial_length, initial_gfp [trained]
## PCA extraction with initial_length, initial_gfp [trained]
c_tidied_pca <- tidy(c_pca_prep, 3)
c_tidied_pca %>%
filter(component %in% paste0("PC", 1:5)) %>%
mutate(component = fct_inorder(component)) %>%
ggplot(aes(value, terms, fill = terms)) +
geom_col(show.legend = FALSE) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL)c_pca_prep %>%
juice() %>%
ggplot(aes(x = PC1, y = PC2, color = cell_status)) +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
geom_jitter(size = 0.7) +
guides(
color = guide_legend(ncol = 2, override.aes = list(alpha = 1)),
fill = guide_legend(ncol = 2)
) +
labs(
color = "Cell status"
) +
NULLp_pca_rec <- recipe(cell_status ~ ., data = plasmid_df) %>%
step_naomit(all_predictors()) %>%
step_normalize(all_predictors()) %>%
step_pca(all_predictors())
p_pca_prep <- prep(p_pca_rec)
p_pca_prep## Data Recipe
##
## Inputs:
##
## role #variables
## outcome 1
## predictor 2
##
## Training data contained 11673 data points and no missing data.
##
## Operations:
##
## Removing rows with NA values in all_predictors()
## Centering and scaling for initial_length, initial_gfp [trained]
## PCA extraction with initial_length, initial_gfp [trained]
p_tidied_pca <- tidy(p_pca_prep, 3)
p_tidied_pca %>%
filter(component %in% paste0("PC", 1:5)) %>%
mutate(component = fct_inorder(component)) %>%
ggplot(aes(value, terms, fill = terms)) +
geom_col(show.legend = FALSE) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL)p_pca_prep %>%
juice() %>%
ggplot(aes(x = PC1, y = PC2, color = cell_status)) +
geom_vline(xintercept = 0) +
geom_hline(yintercept = 0) +
geom_jitter(size = 0.1, alpha = 1/3) +
scale_x_continuous(limits = c(NA, 5)) +
scale_y_continuous(limits = c(NA, 7)) +
guides(
color = guide_legend(ncol = 2, override.aes = list(alpha = 1, size = 1)),
fill = guide_legend(ncol = 2)
) +
labs(
color = "Cell status"
) +
NULLp_umap_rec <- recipe(cell_status ~ ., data = plasmid_df) %>%
step_naomit(all_predictors()) %>%
step_normalize(all_predictors()) %>%
step_umap(all_predictors())
p_umap_prep <- prep(p_umap_rec)
p_umap_prep## Data Recipe
##
## Inputs:
##
## role #variables
## outcome 1
## predictor 2
##
## Training data contained 11673 data points and no missing data.
##
## Operations:
##
## Removing rows with NA values in all_predictors()
## Centering and scaling for initial_length, initial_gfp [trained]
## UMAP embedding for initial_length, initial_gfp [trained]
juice(p_umap_prep) %>%
ggplot(aes(umap_1, umap_2, label )) +
geom_point(aes(color = cell_status), alpha = 0.7, size = 2) +
labs(
x = "UMAP 1",
y = "UMAP 2",
color = "Cell status"
) +
guides(
color = guide_legend(ncol = 2),
fill = guide_legend(ncol = 2)
) +
NULLc_umap_rec <- recipe(cell_status ~ ., data = chromosome_df) %>%
step_naomit(all_predictors()) %>%
step_normalize(all_predictors()) %>%
step_umap(all_predictors())
c_umap_prep <- prep(c_umap_rec)
c_umap_prep## Data Recipe
##
## Inputs:
##
## role #variables
## outcome 1
## predictor 2
##
## Training data contained 893 data points and no missing data.
##
## Operations:
##
## Removing rows with NA values in all_predictors()
## Centering and scaling for initial_length, initial_gfp [trained]
## UMAP embedding for initial_length, initial_gfp [trained]
juice(c_umap_prep) %>%
ggplot(aes(umap_1, umap_2, label )) +
geom_point(aes(color = cell_status), alpha = 0.7, size = 2) +
guides(
color = guide_legend(ncol = 2),
fill = guide_legend(ncol = 2)
) +
labs(
x = "UMAP 1",
y = "UMAP 2",
color = "Cell status"
)chromosome_df %>%
select(cell_status, where(is.numeric)) %>%
rename(`Initial GFP` = initial_gfp, `Initial length` = initial_length) %>%
ggpairs(
data = .,
columns = 2:ncol(.),
aes(color = cell_status, alpha = 1/1000)
)plasmid_df %>%
select(cell_status, where(is.numeric)) %>%
rename(`Initial GFP` = initial_gfp, `Initial length` = initial_length) %>%
ggpairs(
data = .,
columns = 2:ncol(.),
aes(color = cell_status, alpha = 1/1000),
#upper = list(continuous = "points")
)